# Replication code for Merola and Helgason (2016)

# Load Packages
library(foreign)
library("grid") 
library(plyr)
library(coin)
library(texreg)
library(pbkrtest) # To do KR approximation to DFs in mixed models
library(bda) # For Fischer-Pitman test
library(ggplot2)
  # Setting visualization parameters
  theme_set( theme_bw() )
library(coefplot)
library(scales) # to access the alpha function
library(Hmisc)
library(xtable)
library(lme4)
library(exactRankTests)

# Load Data
setwd("")
mh <- read.dta("data.dta",convert.factors = FALSE)


# Create a factor with the treatment variable and order it 
mh$tid.factor <- factor(mh$tid_all, labels= c( "Control", "Upturn", "Downturn", "POUM", "PODM", "Static"))
mh$tid.factor <- relevel(mh$tid.factor, "POUM")
mh$tid.factor <- relevel(mh$tid.factor, "Upturn")
mh$tid.factor <- relevel(mh$tid.factor, "Static")
mh$tid.factor <- relevel(mh$tid.factor, "Control")

library(car) # Only used to recode happiness var
mh$tback<-as.numeric(as.character(recode(mh$thinkback_n,"'Very unhappy'=1;
                                         'Moderately unhappy'=2;
                                         'Slightly unhappy'=3;
                                         'Neither happy nor unhappy'=4;
                                         'Slightly happy'=5;
                                         'Moderately happy'=6;
                                         'Very happy'=7")))

# Create subframe with maindata
mh1 <- subset(mh,select=c(sid,pid,treatment,period,ranking,tid,tid_all,tid.factor,tax,tback,partymain_n,ideology_n,egalitarianism,inc,female,lottery1_n,lottery2_n))
mh1 <- mh1[order(mh1$sid,mh1$pid,mh1$period),]

# Create aggregate data by subject
mh.agg = ddply(mh1, .(pid), summarise, avgrank=mean(ranking), treatment=mean(treatment), tid_all=mean(tid_all), avgtax=mean(tax),tback=mean(tback),sid=mean(sid),partymain_n=mean(partymain_n),ideology_n=mean(ideology_n),egalitarianism=mean(egalitarianism),inc=mean(inc),female=mean(female),lottery1_n=mean(lottery1_n),lottery2_n=mean(lottery2_n))

# Create a factor with the treatment variable and order it 
mh.agg$tid.factor <- factor(mh.agg$tid_all, labels= c( "Control", "Upturn", "Downturn", "POUM", "PODM", "Static"))
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "POUM")
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "Upturn")
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "Static")
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "Control")

### TABLE 1 - NON-PARAMETRIC TESTS

# Kruskal-Wallis test
kw.test=kruskal.test(avgtax~tid.factor,data=mh.agg)

# Pairwise comparisons
tindex = c(8,2,6,3,7)
results.all = matrix(NA,length(tindex),5)
results.all[,1] = tindex

for (i in 1:length(tindex)) {
  temp=wilcox.test(avgtax~tid.factor,data=subset(mh.agg, mh.agg$tid_all==1|mh.agg$tid_all==results.all[i,1]),conf.int=TRUE)
  results.all[i,2]=temp$estimate # Hodges-Lehmann Estimate
  results.all[i,c(3,4)]=temp$conf.int[1:2]
  results.all[i,5]=temp$p.value # Mann-Whitney U p-value
}

results.all = as.data.frame(results.all)
results.all$tid.factor = c( "Static", "Upturn", "POUM", "Downturn", "PODM")

# All of the estimates are specified in the wrong direction, i.e. their 
# sign is reversed. Multiply by -1 to correct
results.all[,2:4] = results.all[,2:4]*-1

# We do one sided test
results.all[,5]=results.all[,5]/2 

# Create dataframe for table
np.df <- data.frame(Treatment = c("Control",results.all$tid.factor),
                    Observations = c(length(mh1$tid.factor[mh1$tid.factor=="Control"])/10,
                                     length(mh1$tid.factor[mh1$tid.factor=="Static"])/10,
                                     length(mh1$tid.factor[mh1$tid.factor=="Upturn"])/10,
                                     length(mh1$tid.factor[mh1$tid.factor=="POUM"])/10,
                                     length(mh1$tid.factor[mh1$tid.factor=="Downturn"])/10,
                                     length(mh1$tid.factor[mh1$tid.factor=="PODM"])/10),
                    HL=c(NA,results.all$V2),
                    MWUPval = c(NA,results.all$V5))
colnames(np.df)=c("Treatment","N", "Hodges-Lehmann \n Estimate", "Mann-Whitney U \n P-value")

## PRODUCE TABLE 1
np = xtable(np.df, caption="Non-Parametric Tests", label="tab:np") 
align(np) = c("l","l","|","c",">{\\centering}p{1.5in}","c")
digits(np) = c(0,0,0,1,3)
names(np)=c("Treatment",
            "N",
            "\\multicolumn{1}{>{\\centering}p{1.4in}}{Hodges-Lehmann Estimate}",
            "\\multicolumn{1}{>{\\centering}p{1.2in}}{Mann-Whitney U P-value}")


comment          <- list()
comment$pos      <- list()
comment$pos[[1]] <- c(nrow(np))
comment$command[[1]]  <- c("\\hline \\multicolumn{4}{p{10cm}}{\\footnotesize{\\textbf{Notes:} All estimates and p-values are based on separate comparisons between each respective treatment condition and the \\textsl{Control} condition. The Hodges-Lehmann estimate is for the differences between medians  and the Mann-Whitney U test is a rank-based nonparametric test for the null hypothesis that both groups come from the same population. P-values shown are based on one-sided alternative hypotheses.}} \n")

print(np,include.rownames=FALSE, #Don't print rownames
      sanitize.colnames.function=function(x){x},
      include.colnames=TRUE, #We create them ourselves 
      caption.placement="top",
      tabular.environment="tabularx",
      add.to.row = comment,
      hline.after = c(-1, 0),
      width="0.7\\textwidth")


### TABLE 2 - MIXED EFFECTS ANALYSIS
mh$rank3=relevel(as.factor(mh$ranking),ref=3) # Set middle rank as reference group

mod1 = lmer(tax~tid.factor + (1|pid),data=mh)
mod2 = lmer(tax~tid.factor + rank3 + (1|pid),data=mh)
mod3 = lmer(tax~tid.factor + rank3 + female+inc+partymain_n+ ideology_n  + (1|pid),data=mh)

# P-values are problematic for mixed models. Use Kenward-Roger approximation.
df.KR1 <- get_ddf_Lb(mod1, fixef(mod1))
p.KR1 <- 2 * (1 - pt(abs(coef(summary(mod1))[,3]), df.KR1))

df.KR2 <- get_ddf_Lb(mod2, fixef(mod2))
p.KR2 <- 2 * (1 - pt(abs(coef(summary(mod2))[,3]), df.KR2))

df.KR3 <- get_ddf_Lb(mod3, fixef(mod3))
p.KR3 <- 2 * (1 - pt(abs(coef(summary(mod3))[,3]), df.KR3))

# PRODUCE TABLE 2
mod1.tr= extract.lme4(mod1, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod2.tr= extract.lme4(mod2, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod3.tr= extract.lme4(mod3, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)

# The following modifies the texreg output such that there is a line break in the custom note
body(texreg)[[48]] = substitute(if (is.null(custom.note)) {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 snote, "}}\n")
} else if (custom.note == "") {
  note <- ""
} else {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 custom.note, "}}\n")
  note <- gsub("%stars", snote, note, perl = TRUE)
})

texreg(list(mod1.tr,mod2.tr,mod3.tr),stars = c(0.05),naive=TRUE, label="tab:mix1",
       custom.note = "%stars. Standard errors in parentheses. The \\textsl{Control} condition is omitted from the model, meaning the coefficients for each treatment can be interpreted with respect to the \\textsl{Control} condition. \\citet{kenward97small} approximation for degrees of freedom used for hypothesis tests.",
       caption="Mixed Effects Models", caption.above=TRUE,booktabs=FALSE, dcolumn=FALSE,
       custom.coef.names = c("Constant", "Static", "Upturn","POUM","Downturn","PODM",
                             "Rank 1", "Rank 2", "Rank 4", "Rank 5", "Female",
                             "Family Income","Republican","Conservatism"),
       custom.gof.names=c("BIC","Observations","Subjects","Subject Variance","Residual Variance"),
       single.row=TRUE,float.pos = "!h",
       override.pval = list(p.KR1,p.KR2,p.KR3))


### FIGURE 1
tindex = c(8,2,6,3,7)
coefplot=matrix(NA,length(tindex),4)

coefplot[,2]=coef(summary(mod3))[2:6,1]
me = qt(0.975,df.KR3)+coef(summary(mod3))[2:6,2]
coefplot[,3:4]=coef(summary(mod3))[2:6,1]+c(-me,me) 
coefplot = as.data.frame(coefplot)
colnames(coefplot) = c("tid","point.estimate","CI.lower","CI.upper")

coefplot[,1]=factor(c("Static","Upturn","POUM","Downturn","PODM"),levels=c("PODM","Downturn","POUM","Upturn","Static"))
zp1 <- ggplot(coefplot) 
zp1 <- zp1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 1, lwd=1)
zp1 <- zp1 + geom_pointrange(aes(x = tid, y = point.estimate, ymin = CI.lower,
                                 ymax = CI.upper), lwd = 1, position = position_dodge(width = 1/2),colour="#000000")
zp1 <- zp1 + coord_flip() + theme_bw()
zp1 = zp1 + 
  labs(y="Tax Rate (%) - Change from Control", x="") + 
  theme(axis.title = element_blank()) +
  # theme(legend.position="bottom") +
  theme(legend.position="none") +
  scale_y_continuous(breaks=c(-15,-10,-5,0,5,10,15))


ggsave(
  "coefplot_main.png",
  zp1,
  width = 8,
  height = 6,
  dpi = 1200
)

### TABLE 3
mod9 = lmer(tax~tid.factor+(1|pid),data=subset(mh,mh$ranking==1|mh$ranking==2))
mod10 = lmer(tax~tid.factor +(1|pid),data=subset(mh,mh$ranking==3))
mod11 = lmer(tax~tid.factor +(1|pid),data=subset(mh,mh$ranking==4|mh$ranking==5))
mod9.tr= extract.lme4(mod9, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod10.tr= extract.lme4(mod10, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod11.tr= extract.lme4(mod11, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)

# P-values are problematic for mixed models. Use Kenward-Roger approximation.
# http://mindingthebrain.blogspot.com/2014/02/three-ways-to-get-parameter-specific-p.html
# http://seriousstats.wordpress.com/tag/mcmc-methods/
df.KR9 <- get_ddf_Lb(mod9, fixef(mod9))
p.KR9 <- 2 * (1 - pt(abs(coef(summary(mod9))[,3]), df.KR9))

df.KR10 <- get_ddf_Lb(mod10, fixef(mod10))
p.KR10 <- 2 * (1 - pt(abs(coef(summary(mod10))[,3]), df.KR10))

df.KR11 <- get_ddf_Lb(mod11, fixef(mod11))
p.KR11 <- 2 * (1 - pt(abs(coef(summary(mod11))[,3]), df.KR11))

# PRODUCE TABLE 3
# The following modifies the texreg output such that there is a line break in the custom note
body(texreg)[[48]] = substitute(if (is.null(custom.note)) {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 snote, "}}\n")
} else if (custom.note == "") {
  note <- ""
} else {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 custom.note, "}}\n")
  note <- gsub("%stars", snote, note, perl = TRUE)
})

texreg(list(mod9.tr,mod10.tr,mod11.tr),stars = c(0.05),naive=TRUE,label="tab:mix3",
       custom.note = "%stars. Standard errors in parentheses. The \\textsl{Control} condition is omitted from the model, meaning the coefficients for each treatment can be interpreted with respect to the \\textsl{Control} condition. \\citet{kenward97small} approximation for
       degrees of freedom used for hypothesis tests.",
       caption="Mixed Effects Models, by Rank", caption.above=TRUE,booktabs=FALSE, dcolumn=FALSE,
       custom.model.names=c("Rank 1 and 2","Rank 3","Rank 4 and 5"),
       custom.coef.names = c("Constant", "Static", "Upturn","POUM","Downturn","PODM"),
       custom.gof.names=c("BIC","Observations","Subjects","Subject Variance","Residual Variance"),
       single.row=TRUE,float.pos = "!h",
       override.pval = list(p.KR9,p.KR10,p.KR11))


### FIGURE 2

fig = matrix(NA,3,4)
fig[,1] = c(1,3,5)

# Rank 1, 2
fig[1,2]=coef(summary(mod9))[2,1]
me = qt(0.975,df.KR9)+coef(summary(mod9))[2,2]
fig[1,3:4]=coef(summary(mod9))[2,1]+c(-me,me) 

# Rank 3
fig[2,2]=coef(summary(mod10))[2,1]
me = qt(0.975,df.KR10)+coef(summary(mod10))[2,2]
fig[2,3:4]=coef(summary(mod10))[2,1]+c(-me,me) 

# Rank 4,5
fig[3,2]=coef(summary(mod11))[2,1]
me = qt(0.975,df.KR11)+coef(summary(mod10))[2,2]
fig[3,3:4]=coef(summary(mod11))[2,1]+c(-me,me) 

fig=as.data.frame(fig)
colnames(fig)=c("X","main","lb","ub")
#fig[,1] = factor(fig[,1],labels=c("Rank 1 and 2", "Rank 3", "Rank 4 and 5"))
h <- ggplot(fig, aes(x=X))
h = h + geom_ribbon(aes(ymin=lb, ymax=ub),fill="grey80") + theme_bw() +
  #geom_ribbon(aes(ymin=CIlow1, ymax=CIhi1),fill="grey40") +
  geom_abline(intercept = 0, slope=0, colour = "black", size = 1) +
  geom_line(aes(y=main)) +
  #geom_line(aes(y=LN5)) +
  scale_x_continuous(name="",breaks=c(1,3,5),labels=c("Rank 1 and 2", "Rank 3", "Rank 4 and 5")) +
  scale_y_continuous(name="Treatment Effect") +
  theme(axis.text=element_text(color="black"))

ggsave(
  "rankstatic.png",
  h,
  width = 7,
  height = 6,
  dpi = 1200
)


################################ APPENDIX CODE ###########################################

# Create subframe with maindata
mh1 <- subset(mh,select=c(sid,pid,treatment,period,ranking,tid,tid_all,tid.factor,tax,tback,partymain_n,ideology_n,egalitarianism,inc,female,lottery1_n,lottery2_n, man_a_n))
mh1 <- mh1[order(mh1$sid,mh1$pid,mh1$period),]



# Create aggregate data by subject
mh.agg = ddply(mh1, .(pid), summarise, avgrank=mean(ranking), treatment=mean(treatment), tid_all=mean(tid_all), avgtax=mean(tax),tback=mean(tback),sid=mean(sid),partymain_n=mean(partymain_n),ideology_n=mean(ideology_n),egalitarianism=mean(egalitarianism),inc=mean(inc),female=mean(female),lottery1_n=mean(lottery1_n),lottery2_n=mean(lottery2_n),man_a_n=mean(man_a_n))

# Create a factor with the treatment variable and order it 
mh.agg$tid.factor <- factor(mh.agg$tid_all, labels= c( "Control", "Upturn", "Downturn", "POUM", "PODM", "Static"))
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "POUM")
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "Upturn")
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "Static")
mh.agg$tid.factor <- relevel(mh.agg$tid.factor, "Control")

## A.1
# Table a
df <- data.frame(Treatment = levels(mh1$tid.factor),
                 Subjects = c(length(mh1$tid.factor[mh1$tid.factor=="Control"])/10,
                              length(mh1$tid.factor[mh1$tid.factor=="Static"])/10,
                              length(mh1$tid.factor[mh1$tid.factor=="Upturn"])/10,
                              length(mh1$tid.factor[mh1$tid.factor=="POUM"])/10,
                              length(mh1$tid.factor[mh1$tid.factor=="Downturn"])/10,
                              length(mh1$tid.factor[mh1$tid.factor=="PODM"])/10),
                 Rounds = rep(10,6),
                 Observations = c(length(mh1$tid.factor[mh1$tid.factor=="Control"]),
                                  length(mh1$tid.factor[mh1$tid.factor=="Static"]),
                                  length(mh1$tid.factor[mh1$tid.factor=="Upturn"]),
                                  length(mh1$tid.factor[mh1$tid.factor=="POUM"]),
                                  length(mh1$tid.factor[mh1$tid.factor=="Downturn"]),
                                  length(mh1$tid.factor[mh1$tid.factor=="PODM"])),
                 Median = c(median(mh1$tax[mh1$tid.factor=="Control"]),
                            median(mh1$tax[mh1$tid.factor=="Static"]),
                            median(mh1$tax[mh1$tid.factor=="Upturn"]),
                            median(mh1$tax[mh1$tid.factor=="POUM"]),
                            median(mh1$tax[mh1$tid.factor=="Downturn"]),
                            median(mh1$tax[mh1$tid.factor=="PODM"])))
colnames(df)=c("Treatment","Subjects", "Rounds", "Observations", "Median Tax (%)")

df[,2:5]=round(df[,2:5],0)

mainres= xtable(df, caption="Treatment Cnditions", label="tab:desc") 
align(mainres) = "ll|cccc"
digits(mainres) = rep(0,6)

comment          <- list()
comment$pos      <- list()
comment$pos[[1]] <- c(nrow(df))
comment$command  <- c("\\hline \\multicolumn{5}{c}{Item} \n")

print(mainres,include.rownames=FALSE, #Don't print rownames
      include.colnames=TRUE, #We create them ourselves 
      caption.placement="top",
      tabular.environment="tabularx",
      # add.to.row = comment,
      #hline.after = c(-1, 0),
      width="0.8\\textwidth")

# Table B
# Create dataframe for table
obs = subset(mh,select=c(tax,ranking))
sub = subset(mh.agg,select=c(avgtax,avgrank,female,inc,partymain_n,ideology_n))
descstats <- data.frame(Variable = c("Tax","Ranking", "Average Tax","Average Ranking","Female", "Family Income", "Republican", "Conservatism"),
                        Operationalization = c("Tax voted for (%)", "Ranking from 1st task (Ordinal)", "Average Tax Voted for Across 10 Rounds (%)","Average Ranking Across 10 Rounds (%)","Female? (Yes/No)", "Survey question: What is your family's annual income? (Ordinal)", "Survey Question: Generally speaking, do you usually think of yourself as a Republican, a Democrat, an Independent, or what? (Ordinal)", "Survey question: Here is a 7-point scale on which the political views that people might hold are arranged from extremely liberal to extremely conservative. Where would you place yourself on this scale? (Ordinal)"),
                        N = c(apply(obs,2,function(x) sum(table(x))),apply(sub,2,function(x) sum(table(x,useNA="ifany")))),
                        Min = c(apply(obs,2,min,na.rm=TRUE),apply(sub,2,min,na.rm=TRUE)),
                        Max = c(apply(obs,2,max,na.rm=TRUE),apply(sub,2,max,na.rm=TRUE)),
                        Mean = c(apply(obs,2,mean,na.rm=TRUE),apply(sub,2,mean,na.rm=TRUE)),
                        Median = c(apply(obs,2,median,na.rm=TRUE),apply(sub,2,median,na.rm=TRUE)),
                        SD = c(apply(obs,2,sd,na.rm=TRUE),apply(sub,2,sd,na.rm=TRUE)),
                        NAs = c(apply(obs,2,function(x) sum(is.na(x))),apply(sub,2,function(x) sum(is.na(x)))))                 

appds= xtable(descstats, caption="Detailed Descriptive Statistics", label="tab:descstats") 
align(appds) = "llp{4.5cm}ccccccc"
digits(appds) = c(0,0,0,0,1,1,2,1,1,1)

comment2          <- list()
comment2$pos      <- list()
comment2$pos[[1]] <- c(0)
comment2$pos[[2]] <- c(2)
comment2$command  <- c("\\hline \\multicolumn{2}{l}{\\textbf{Observation-level}} &&&&&&& \\\\",
                       "\\textbf{Subject-level} &&&&&&&& \\\\")

print(appds,include.rownames=FALSE, #Don't print rownames
      include.colnames=TRUE, #We create them ourselves 
      caption.placement="top",
      size="footnotesize",
      tabular.environment="tabularx",
      add.to.row = comment2,
      hline.after = c(-1,8),
      width="1\\textwidth")

# A.1.1
library(DataCombine)
library(plyr)
library(mosaic)

# Figure A
mh=mh[with(mh, order(pid, period)), ]
mhlead <- slide(mh, Var = "ranking", GroupVar = "pid",
                slideBy = +1)
mhlead <- slide(mhlead, Var = "tax", GroupVar = "pid",
                slideBy = +1)

mhlead = subset(mhlead,select=c("ranking","ranking1","tax","tax1"))
mhlead = subset(mhlead,complete.cases(mhlead))
mhlead$rankch=mhlead$`ranking1`-mhlead$ranking
mhlead$taxch=mhlead$tax-mhlead$`tax1`

var1=as.factor(mhlead$`ranking`)
var2=as.factor(mhlead$`ranking1`)
levVar1 <- length(levels(var1))
levVar2 <- length(levels(var2))

jointTable <- prop.table(table(var1, var2))
plotData <- as.data.frame(jointTable)
plotData=plotData[with(plotData, order(var1, var2)), ]
plotData$marginVar1 <- prop.table(table(var1))
plotData$marginVar2 <- prop.table(table(var2))
plotData$var2Height <- plotData$Freq / plotData$marginVar1
plotData$var2CumHeight1 <- c(0, cumsum(plotData$var2Height)[1:levVar2 -1]) 
plotData$var2CumHeight2<- c(cumsum(plotData$var2Height)[1:levVar2]) 
plotData$var2CumHeight <- rowMeans(cbind(plotData$var2CumHeight2,plotData$var2CumHeight1))
plotData$var1Center <- c(0, cumsum(plotData$marginVar1)[1:levVar1 -1]) +
  plotData$marginVar1 / 2

p=ggplot(plotData, aes(var1Center, var2Height))  + 
  geom_bar(stat = "identity", aes(width = marginVar1, fill = var1), col = "Black") + scale_fill_grey() +
  geom_text(aes(label = as.character(var2), x = var1Center, y = -0.05)) + theme(legend.position="none") +
  geom_text(aes(label = as.character(var2), y = var2CumHeight, x = -0.05)) + guides(fill=FALSE) + 
  scale_y_continuous(name="Ranking in period t+1") +
  scale_x_continuous(name="Ranking in period t") +
  theme_bw()

ggsave(
  "rankvar.png",
  p,
  width = 8,
  height = 6,
  dpi = 1200
)


#### Figure B
mh$rank3=relevel(as.factor(mh$ranking),ref=3)
mod = lmer(tax~(1|pid),data=mh)
summary(mod)
228.3/(228.3+607.6)
# ICC = 27.3%

mhFE =  ddply(mh, c("pid"), transform, taxFE = scale(tax,scale=FALSE))
modFE = lm(taxFE~rank3,data=mhFE)
summary(modFE)

favstats(tax~pid,data=mh)
mean(favstats(tax~pid,data=mh)$sd)

covar=ggplot(mhlead,aes(x=rankch,y=taxch))+ geom_point(alpha=0.3,position = position_jitter(w=0.1,h=0.1)) +
  stat_density2d(aes(alpha=..level..), geom="polygon") +
  geom_smooth(method = "loess", size = 1.5,color="black") + 
  #scale_alpha_continuous(limits=c(0,0.01))+
  theme_bw() + theme(legend.position="none") +
  scale_y_continuous(name="Change in tax vote from t to t+1 (% point)",limits=c(-101,101),breaks=seq(-100,100,20)) +
  scale_x_continuous(name="Change in rank from t to t+1",limits=c(-4.01,4.01),breaks=seq(-4,4,1)) 

ggsave(
  "covar.png",
  covar,
  width = 8,
  height = 6,
  dpi = 1200
)

# A.7

# Table C
mh$rank3=relevel(as.factor(mh$ranking),ref=3)
mod1sid = lmer(tax~tid.factor + (1|pid) + (1|sid),data=mh)
mod2sid = lmer(tax~tid.factor + rank3 + (1|pid) + (1|sid),data=mh)
mod3sid = lmer(tax~tid.factor + rank3 + female+inc+partymain_n+ ideology_n  + (1|pid) + (1|sid),data=mh)

# P-values are problematic for mixed models. Use Kenward-Roger approximation.
df.KR1 <- get_ddf_Lb(mod1sid, fixef(mod1sid))
p.KR1sid <- 2 * (1 - pt(abs(coef(summary(mod1sid))[,3]), df.KR1))

df.KR2 <- get_ddf_Lb(mod2sid, fixef(mod2sid))
p.KR2sid <- 2 * (1 - pt(abs(coef(summary(mod2sid))[,3]), df.KR2))

df.KR3 <- get_ddf_Lb(mod3sid, fixef(mod3sid))
p.KR3sid <- 2 * (1 - pt(abs(coef(summary(mod3sid))[,3]), df.KR3))

mod1sid.tr= extract.lme4(mod1sid, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod2sid.tr= extract.lme4(mod2sid, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod3sid.tr= extract.lme4(mod3sid, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)

# The following modifies the texreg output such that there is a line break in the custom note
body(texreg)[[48]] = substitute(if (is.null(custom.note)) {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 snote, "}}\n")
} else if (custom.note == "") {
  note <- ""
} else {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 custom.note, "}}\n")
  note <- gsub("%stars", snote, note, perl = TRUE)
})

texreg(list(mod1sid.tr,mod2sid.tr,mod3sid.tr),stars = c(0.05),naive=TRUE, label="tab:mix1",
       custom.note = "%stars. Standard errors in parentheses. \\citet{kenward97small} approximation for degrees of freedom used for hypothesis tests.",
       caption="Mixed Effects Models with Session Effects Included", caption.above=TRUE,booktabs=FALSE, dcolumn=FALSE,
       custom.coef.names = c("Constant", "Static", "Upturn","POUM","Downturn","PODM",
                             "Rank 1", "Rank 2", "Rank 4", "Rank 5", "Female",
                             "Family Income","Republican","Conservatism"),
       custom.gof.names=c("BIC","Observations","Subjects","Sessions","Subject Variance","Session Variance","Residual Variance"),
       single.row=TRUE,float.pos = "!h",
       override.pval = list(p.KR1sid,p.KR2sid,p.KR3sid))


# Table D
mod9sid = lmer(tax~tid.factor+(1|pid) + (1|sid),data=subset(mh,mh$ranking==1|mh$ranking==2))
mod10sid = lmer(tax~tid.factor +(1|pid) + (1|sid),data=subset(mh,mh$ranking==3))
mod11sid = lmer(tax~tid.factor +(1|pid) + (1|sid),data=subset(mh,mh$ranking==4|mh$ranking==5))
mod9sid.tr= extract.lme4(mod9sid, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod10sid.tr= extract.lme4(mod10sid, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod11sid.tr= extract.lme4(mod11sid, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)

# P-values are problematic for mixed models. Use Kenward-Roger approximation.
# http://mindingthebrain.blogspot.com/2014/02/three-ways-to-get-parameter-specific-p.html
# http://seriousstats.wordpress.com/tag/mcmc-methods/
df.KR9 <- get_ddf_Lb(mod9sid, fixef(mod9sid))
p.KR9sid <- 2 * (1 - pt(abs(coef(summary(mod9sid))[,3]), df.KR9))

df.KR10 <- get_ddf_Lb(mod10sid, fixef(mod10sid))
p.KR10sid <- 2 * (1 - pt(abs(coef(summary(mod10sid))[,3]), df.KR10))

df.KR11 <- get_ddf_Lb(mod11sid, fixef(mod11sid))
p.KR11sid <- 2 * (1 - pt(abs(coef(summary(mod11sid))[,3]), df.KR11))

body(texreg)[[48]] = substitute(if (is.null(custom.note)) {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 snote, "}}\n")
} else if (custom.note == "") {
  note <- ""
} else {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 custom.note, "}}\n")
  note <- gsub("%stars", snote, note, perl = TRUE)
})

texreg(list(mod9sid.tr,mod10sid.tr,mod11sid.tr),stars = c(0.05),naive=TRUE,label="tab:mix3",
       custom.note = "%stars. Standard errors in parentheses. \\citet{kenward97small} approximation for
       degrees of freedom used for hypothesis tests.",
       caption="Mixed Effects Models, by Rank, with Session Effects Included", caption.above=TRUE,booktabs=FALSE, dcolumn=FALSE,
       custom.model.names=c("Rank 1 and 2","Rank 3","Rank 4 and 5"),
       custom.coef.names = c("Constant", "Static", "Upturn","POUM","Downturn","PODM"),
       custom.gof.names=c("BIC","Observations","Subjects","Sessions","Subject Variance","Session Variance","Residual Variance"),
       single.row=TRUE,float.pos = "!h",
       override.pval = list(p.KR9sid,p.KR10sid,p.KR11sid))

# Table E
mh$rank3=relevel(as.factor(mh$ranking),ref=3)
mh.true = subset(mh,tid!=4&tid!=5)
mod1 = lmer(tax~tid.factor + (1|pid),data=mh.true)
mod2 = lmer(tax~tid.factor + rank3 + (1|pid),data=mh.true)
mod3 = lmer(tax~tid.factor + rank3 + female+inc+partymain_n+ ideology_n  + (1|pid),data=mh.true)

# P-values are problematic for mixed models. Use Kenward-Roger approximation.
df.KR1 <- get_ddf_Lb(mod1, fixef(mod1))
p.KR1 <- 2 * (1 - pt(abs(coef(summary(mod1))[,3]), df.KR1))

df.KR2 <- get_ddf_Lb(mod2, fixef(mod2))
p.KR2 <- 2 * (1 - pt(abs(coef(summary(mod2))[,3]), df.KR2))

df.KR3 <- get_ddf_Lb(mod3, fixef(mod3))
p.KR3 <- 2 * (1 - pt(abs(coef(summary(mod3))[,3]), df.KR3))

mod1.tr= extract.lme4(mod1, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod2.tr= extract.lme4(mod2, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod3.tr= extract.lme4(mod3, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)

# The following modifies the texreg output such that there is a line break in the custom note
body(texreg)[[48]] = substitute(if (is.null(custom.note)) {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 snote, "}}\n")
} else if (custom.note == "") {
  note <- ""
} else {
  note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                 custom.note, "}}\n")
  note <- gsub("%stars", snote, note, perl = TRUE)
})

texreg(list(mod1.tr,mod2.tr,mod3.tr),stars = c(0.05),naive=TRUE, label="tab:mix4",
       custom.note = "%stars. Standard errors in parentheses. The \\textsl{Control} condition is omitted from the model, meaning the coefficients for each treatment can be interpreted with respect to the \\textsl{Control} condition. \\citet{kenward97small} approximation for degrees of freedom used for hypothesis tests.",
       caption="Mixed Effects Models with ``Impure'' Controls Exluded", caption.above=TRUE,booktabs=FALSE, dcolumn=FALSE,
       custom.coef.names = c("Constant", "Static", "Upturn","POUM","Downturn","PODM",
                             "Rank 1", "Rank 2", "Rank 4", "Rank 5", "Female",
                             "Family Income","Republican","Conservatism"),
       custom.gof.names=c("BIC","Observations","Subjects","Subject Variance","Residual Variance"),
       single.row=TRUE,float.pos = "!h",
       override.pval = list(p.KR1,p.KR2,p.KR3))

# Table F
mod9 = lmer(tax~tid.factor+(1|pid),data=subset(mh.true,mh.true$ranking==1|mh.true$ranking==2))
mod10 = lmer(tax~tid.factor +(1|pid),data=subset(mh.true,mh.true$ranking==3))
mod11 = lmer(tax~tid.factor +(1|pid),data=subset(mh.true,mh.true$ranking==4|mh.true$ranking==5))
mod9.tr= extract.lme4(mod9, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod10.tr= extract.lme4(mod10, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)
mod11.tr= extract.lme4(mod11, naive=TRUE, include.aic=FALSE, include.bic =TRUE,include.loglik=FALSE,include.deviance=FALSE)

# P-values are problematic for mixed models. Use Kenward-Roger approximation.
df.KR9 <- get_ddf_Lb(mod9, fixef(mod9))
p.KR9 <- 2 * (1 - pt(abs(coef(summary(mod9))[,3]), df.KR9))

df.KR10 <- get_ddf_Lb(mod10, fixef(mod10))
p.KR10 <- 2 * (1 - pt(abs(coef(summary(mod10))[,3]), df.KR10))

df.KR11 <- get_ddf_Lb(mod11, fixef(mod11))
p.KR11 <- 2 * (1 - pt(abs(coef(summary(mod11))[,3]), df.KR11))

# The following modifies the texreg output such that there is a line break in the custom note
  body(texreg)[[48]] = substitute(if (is.null(custom.note)) {
    note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                   snote, "}}\n")
  } else if (custom.note == "") {
    note <- ""
  } else {
    note <- paste0("\\multicolumn{", length(models) + 1, "}{p{12cm}}{\\scriptsize{", 
                   custom.note, "}}\n")
    note <- gsub("%stars", snote, note, perl = TRUE)
  })

texreg(list(mod9.tr,mod10.tr,mod11.tr),stars = c(0.05),naive=TRUE,label="tab:mix3",
       custom.note = "%stars. Standard errors in parentheses. The \\textsl{Control} condition is omitted from the model, meaning the coefficients for each treatment can be interpreted with respect to the \\textsl{Control} condition. \\citet{kenward97small} approximation for
       degrees of freedom used for hypothesis tests.",
       caption="Mixed Effects Models, by Rank, with ``Impure'' Controls Excluded", caption.above=TRUE,booktabs=FALSE, dcolumn=FALSE,
       custom.model.names=c("Rank 1 and 2","Rank 3","Rank 4 and 5"),
       custom.coef.names = c("Constant", "Static", "Upturn","POUM","Downturn","PODM"),
       custom.gof.names=c("BIC","Observations","Subjects","Subject Variance","Residual Variance"),
       single.row=TRUE,float.pos = "!h",
       override.pval = list(p.KR9,p.KR10,p.KR11))


## A.8
# Manipulation checks
mh_sub=subset(mh,select=c(pid,man_a_n,man_c_n,man_d_n, man_e_n, man_f_n, man_pay_n, tid, tid_all))
mh_sub=unique(mh_sub)
mod1m=lm(as.numeric(man_a_n)~as.factor(tid_all), data=subset(mh_sub,tid!=8))
mod2m=lm(as.numeric(man_c_n)~as.factor(tid_all), data=subset(mh_sub,tid!=8))
mod3m=lm(as.numeric(man_d_n)~as.factor(tid_all), data=subset(mh_sub,tid!=8))
mod4m=lm(as.numeric(man_e_n)~as.factor(tid_all), data=subset(mh_sub,tid!=8))
mod5m=lm(as.numeric(man_f_n)~as.factor(tid_all), data=subset(mh_sub,tid!=8))
mod6m=lm(as.numeric(man_pay_n)~as.factor(tid_all), data=subset(mh_sub,tid!=8))

# Use results from mod 5 and mod 6
tindex = c(2,3,6,7)
mod5.plot = matrix(NA,length(tindex),5)
mod5.plot[,1] = tindex
mod5.plot[,2]=coef(mod5m)[-c(1)]
mod5.plot[,3:4]=confint(mod5m,level = 0.95)[-c(1),]
mod5.plot[,5]=rep("Model 1 - Expected Position",length(tindex))
mod5.plot = as.data.frame(mod5.plot)
colnames(mod5.plot) = c("tid","point.estimate","CI.lower","CI.upper","mod")

mod5.plot$tid2[mod5.plot$tid==2] = "Upturn"
mod5.plot$tid2[mod5.plot$tid==3] = "Downturn"
mod5.plot$tid2[mod5.plot$tid==6] = "POUM"
mod5.plot$tid2[mod5.plot$tid==7] = "PODM"

mod6.plot = matrix(NA,length(tindex),5)
mod6.plot[,1] = tindex
mod6.plot[,2]=coef(mod6m)[-c(1)]
mod6.plot[,3:4]=confint(mod6m,level = 0.95)[-c(1),]
mod6.plot[,5]=rep("Model 2 - Expected Payoff",length(tindex))
mod6.plot = as.data.frame(mod6.plot)
colnames(mod6.plot) = c("tid","point.estimate","CI.lower","CI.upper","mod")

mod6.plot$tid2[mod6.plot$tid==2] = "Upturn"
mod6.plot$tid2[mod6.plot$tid==3] = "Downturn"
mod6.plot$tid2[mod6.plot$tid==6] = "POUM"
mod6.plot$tid2[mod6.plot$tid==7] = "PODM"

# Combine tables 1a and 1b
tabc = rbind(mod5.plot,mod6.plot)
tabc$point.estimate = as.numeric(as.character(tabc$point.estimate))
tabc$CI.upper = as.numeric(as.character(tabc$CI.upper))
tabc$CI.lower = as.numeric(as.character(tabc$CI.lower))

zp1 <- ggplot(tabc,aes(linetype = mod)) 
zp1 <- zp1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 1, lwd=1)
zp1 <- zp1 + geom_pointrange(aes(x = tid2, y = point.estimate, ymin = CI.lower, shape=mod,
                                 ymax = CI.upper), lwd = 1, position = position_dodge(width = 1/2),colour="#000000")
zp1 <- zp1 + coord_flip() + theme_bw()
zp1 = zp1 + 
  # labs(y="Tax Rate (%) - Change from Control", x="") + 
  theme(axis.title = element_blank()) +
  scale_colour_brewer(name="Sample",palette="Set1") + 
  # theme(legend.position="bottom") +
  theme(legend.position="none") +
  theme(axis.text.y = element_text(size = rel(1))) +
  theme(axis.text.x = element_text(size = rel(1))) +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  theme(legend.text = element_text(size = rel(3/4),vjust = 1)) + 
  scale_y_continuous(breaks=c(-2,-1,0,1,2)) 



ggsave(
  "coefplot_mancheck.png",
  zp1,
  width = 8,
  height = 6,
  dpi = 1200
)

